#!/bin/env python
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats

#----------
# Parameters
#----------
ncluster=3
caps = "abcdefghijklmn"

#----------
# Subroutine
#----------
def linreg(X,Y):
    """
     return a,b in solution to y = ax + b such that root mean square distance
     between trend line and original points is minimized
    """
    N = len(X)
    Sx = Sy = Sxx = Syy = Sxy = 0.0

    for x, y in zip(X, Y):
        Sx = Sx + x
        Sy = Sy + y
        Sxx = Sxx + x*x
        Syy = Syy + y*y
        Sxy = Sxy + x*y
    det = Sxx * N - Sx * Sx
    aa = (Sxy * N - Sy * Sx)/det
    bb = (Sxx * Sy - Sx * Sxy)/det
    YY2 = aa * X + bb
    return X, YY2, aa

def mk_test(x, alpha = 0.05):
    from scipy.stats import norm
    """
    this perform the MK (Mann-Kendall) test to check if the trend is present in
    data or not

    Input:
        x:   a vector of data
        alpha: significance level

    Output:
        h: True (if trend is present) or False (if trend is absence)
        p: p value of the sifnificance test

    Examples
    --------
      >>> x = np.random.rand(100)
      >>> h,p = mk_test(x,0.05)  # meteo.dat comma delimited
    """
    n = len(x)

    # calculate S
    s = 0
    for k in range(n-1):
        for j in range(k+1,n):
            s += np.sign(x[j] - x[k])

    # calculate the unique data
    unique_x = np.unique(x)
    g = len(unique_x)

    # calculate the var(s)
    if n == g: # there is no tie
        var_s = (n*(n-1)*(2*n+5))/18
    else: # there are some ties in data
        tp = np.zeros(unique_x.shape)
        for i in range(len(unique_x)):
            tp[i] = sum(unique_x[i] == x)
        var_s = (n*(n-1)*(2*n+5) + np.sum(tp*(tp-1)*(2*tp+5)))/18

    if s>0:
        z = (s - 1)/np.sqrt(var_s)
    elif s == 0:
        z = 0
    elif s<0:
        z = (s + 1)/np.sqrt(var_s)

    # calculate the p_value
    p = 2*(1-norm.cdf(abs(z))) # two tail test
    h = abs(z) > norm.ppf(1-alpha/2)

    return h, p

#----------
# Read Data
#----------
indir1 = "./Data/Observations"
indir2 = "./Data/HiFLOR_AllForc"
indir3 = "./Data/HiFLOR_NatForc"
indir4 = "./Data/SPEAR_AllForc"
indir5 = "./Data/SPEAR_NatForc"

tcnum={}
tcnum_hiflor_all={}
tcnum_hiflor_nat={}
tcnum_spear_all={}
tcnum_spear_nat={}

for cl in range(1,ncluster+1,1):
    print ("read data cl=",cl)
    
    #--Frequency Time series (Obs)
    datafl = "%s/timeseries_each.json" % (indir1)
    tcnum[cl] = pd.read_json(datafl)
    
    #--Frequency Time series (HiFLOR AllFloc)
    datafl2 = "%s/timeseries_each.json" % (indir2)
    tcnum_hiflor_all[cl] = pd.read_json(datafl2)

    #--Frequency Time series (HiFLOR NatForc)
    datafl3 = "%s/timeseries_each.json" % (indir3)
    tcnum_hiflor_nat[cl] = pd.read_json(datafl3)
    
    #--Frequency Time series (SPEAR AllFloc)
    datafl4 = "%s/timeseries_each.json" % (indir4)
    tcnum_spear_all[cl] = pd.read_json(datafl4)
    
    #--Frequency Time series (SPEAR NatForc)
    datafl5 = "%s/timeseries_each.json" % (indir5)
    tcnum_spear_nat[cl] = pd.read_json(datafl5)

#----------
# Plot
#----------
fig = plt.figure(figsize=(8,12)) # set figure environemnt

regname={
1: "Eastern Japan",
2: "Central Japan",
3: "Western Japan",
}

for cl in range(1,ncluster+1,1):
    
   #----------------
   # Subplot
   #----------------
    ax1 = fig.add_subplot(3, 1, cl) # set up panel plot
    
   #----------------
   # Time Series
   #----------------
    
   #--model (HiFLOR AllForc)
    syear=1977
    eyear=2050

    data_ = tcnum_hiflor_all[cl][(tcnum_hiflor_all[cl]["cl"]==cl)&(tcnum_hiflor_all[cl]["year"]<=eyear)&(tcnum_hiflor_all[cl]["year"]>=syear)]['freq'].to_numpy()
    years = np.arange(syear,eyear+1,1)
    ymax = len(years)
    tmax = len(data_)
    emax = int(tmax/ymax)
    allens = data_.reshape((emax,ymax))
    ensmean = allens.mean(axis=0)
    
    r90 = np.ma.zeros((ymax,2)) # 90% range
    for tt in range(ymax):
        xx = np.array(allens[:,tt])
        r90[tt,:] = stats.poisson.interval(0.99, xx.mean(), loc=0)

    plt.fill_between(years, allens[:,:].min(axis=0), allens[:,:].max(axis=0), color="red", alpha=0.2)
    h, pval2 = mk_test(ensmean, alpha=0.05)
    plt.plot(years, ensmean, "-", color="red", label="HiFLOR (AllForc, P-value=%.2f)" % (pval2), lw=2, alpha=0.5)
    if pval2 <=0.05:
        xx, yy, aa = linreg(years, ensmean)
        plt.plot(xx, yy, "--", color="red",lw=4, alpha=1.0)
    ax1.set_ylabel("Anomalous Days", fontsize=12)
    ax1.set_xlabel("Year", fontsize=12)
    
   # ---------------------------------------------
   #--model (HiFLOR NatForc)
    syear=1977
    eyear=2020

    data_ = tcnum_hiflor_nat[cl][(tcnum_hiflor_nat[cl]["cl"]==cl)&(tcnum_hiflor_nat[cl]["year"]<=eyear)&(tcnum_hiflor_nat[cl]["year"]>=syear)]['freq'].to_numpy()
    years = np.arange(syear,eyear+1,1)
    ymax = len(years)
    tmax = len(data_)
    emax = int(tmax/ymax)
    allens = data_.reshape((emax,ymax))
    ensmean = allens.mean(axis=0)
    
    r90 = np.ma.zeros((ymax,2)) # 90% range
    for tt in range(ymax):
        xx = np.array(allens[:,tt])
        r90[tt,:] = stats.poisson.interval(0.99, xx.mean(), loc=0)

    plt.fill_between(years, allens[:,:].min(axis=0), allens[:,:].max(axis=0), color="red", alpha=0.2)
    h, pval2 = mk_test(ensmean, alpha=0.05)
    plt.plot(years, ensmean, "-", color="cyan", label="HiFLOR (NatForc, P-value=%.2f)" % (pval2), lw=2, alpha=0.5)
    if pval2 <=0.05:
        xx, yy, aa = linreg(years, ensmean)
        plt.plot(xx, yy, "--", color="cyan",lw=4, alpha=1.0)
    ax1.set_ylabel("Anomalous Days", fontsize=12)
    ax1.set_xlabel("Year", fontsize=12)
    
   #--model (SPEAR AllForc)
    syear=1977
    eyear=2050
    
    data_ = tcnum_spear_all[cl][(tcnum_spear_all[cl]["cl"]==cl)&(tcnum_spear_all[cl]["year"]<=eyear)&(tcnum_spear_all[cl]["year"]>=syear)]['freq'].to_numpy()
    years = np.arange(syear,eyear+1,1)
    ymax = len(years)
    tmax = len(data_)
    emax = int(tmax/ymax)
    allens = data_.reshape((emax,ymax))
    ensmean = allens.mean(axis=0)
    
    r90 = np.ma.zeros((ymax,2)) # 90% range
    for tt in range(ymax):
        xx = np.array(allens[:,tt])
        r90[tt,:] = stats.poisson.interval(0.99, xx.mean(), loc=0)
    plt.fill_between(years, allens[:,:].min(axis=0), allens[:,:].max(axis=0), color="orange", alpha=0.2)
    h, pval2 = mk_test(ensmean, alpha=0.05)
    plt.plot(years, ensmean, "-", color="orange", label="SPEAR (AllForc, P-value=%.2f)" % (pval2), lw=2, alpha=0.5)
    if pval2 <=0.05:
        xx, yy, aa = linreg(years, ensmean)
        plt.plot(xx, yy, "--", color="orange",lw=4, alpha=1.0)
    ax1.set_ylabel("Anomalous days", fontsize=12)
    ax1.set_xlabel("Year", fontsize=12)
    
  ##--model (SPEAR NatForc)
    syear=1977
    eyear=2050

    data_ = tcnum_spear_nat[cl][(tcnum_spear_nat[cl]["cl"]==cl)&(tcnum_spear_nat[cl]["year"]<=eyear)&(tcnum_spear_nat[cl]["year"]>=syear)]['freq'].to_numpy()
    years = np.arange(syear,eyear+1,1)
    ymax = len(years)
    tmax = len(data_)
    emax = int(tmax/ymax)
    allens = data_.reshape((emax,ymax))
    ensmean = allens.mean(axis=0)
    
    r90 = np.ma.zeros((ymax,2)) # 90% range
    for tt in range(ymax):
        xx = np.array(allens[:,tt])
        r90[tt,:] = stats.poisson.interval(0.90, xx.mean(), loc=0)
    
    plt.fill_between(years, allens[:,:].min(axis=0), allens[:,:].max(axis=0), color="blue", alpha=0.2)
    h, pval3 = mk_test(ensmean, alpha=0.05)
    plt.plot(years, ensmean, "-", color="blue", label="SPEAR (NatForc, P-value=%.2f)" % (pval3), lw=2, alpha=0.5)
    if pval3 <=0.05:
        xx, yy, aa = linreg(years, ensmean)
        plt.plot(xx, yy, "--", color="blue",lw=4, alpha=1.0)
             
   #----------------     
   # Text P-value
   #----------------
    plt.legend(loc=2, prop={"size":12})

   #----------------
   # Title
   #----------------
    title = "%s" % regname[cl]
    plt.title("(%s) %s" % (caps[cl-1],title), horizontalalignment='center',fontsize=16, transform=ax1.transAxes, y=1.03)
    
    plt.xticks(fontsize=14)
    plt.yticks(fontsize=14)

fig.tight_layout()
#plt.show()

fig.savefig("./Fig7.png")
